Note: This notebook about 40 MB of embedded output data, which should be put under version control.
Please make sure to clear all cell output using the Cell>All Output>Clear command from the menu befor committing changes!
Use Cell>Run all to reproduce the output.


In [ ]:
import numpy as np
import librosa
import mir_eval
import matplotlib.pyplot as plt
%matplotlib inline

import IPython.display
from IPython.display import display

import os, deepthought

from deepthought.datasets.openmiir.metadata import load_stimuli_metadata, save_beat_times

STIMULI_VERSION = 2   # change to 1 for older stimuli version
data_root = os.path.join(deepthought.DATA_PATH, 'OpenMIIR')
default_save_beat_times = False # change to True to save beat time to txt file

def play_beats(y, sr, beats):
    
    if y is None:
        # Sonify the beats only
        y_beat = mir_eval.sonify.clicks(beats, sr, length=len(y))
    else:
        # Sonify the beats and add them to y
        y_beat = y + mir_eval.sonify.clicks(beats, sr, length=len(y))    
    
    return IPython.display.Audio(data=y_beat, rate=sr)

def visualize(y, sr, title=None, playback=True, beats=None):
    
    # show playback widget above figure
    if playback:
        if title is not None:
            print title
        
        if beats is None:
            display(IPython.display.Audio(data=y, rate=sr))
        else:
            beat_times = librosa.frames_to_time(beats, sr=sr, hop_length=64)
            display(play_beats(y, sr, beat_times))
    
    # Let's make and display a mel-scaled power (energy-squared) spectrogram
    # We use a small hop length of 64 here so that the frames line up with the beat tracker example below.
    S = librosa.feature.melspectrogram(y, sr=sr, n_fft=2048, hop_length=64, n_mels=128)

    # Convert to log scale (dB). We'll use the peak power as reference.
    log_S = librosa.logamplitude(S, ref_power=np.max)

    # Make a new figure
    plt.figure(figsize=(12,4))

    # Display the spectrogram on a mel scale
    # sample rate and hop length parameters are used to render the time axis
    librosa.display.specshow(log_S, sr=sr, hop_length=64, x_axis='time', y_axis='mel')

    # Put a descriptive title on the plot
    if title is not None:
        plt.title('mel power spectrogram ({})'.format(title))

    if beats is not None:
        # Let's draw lines with a drop shadow on the beat events
        plt.vlines(beats, 0, log_S.shape[0], colors='k', linestyles='-', linewidth=2.5)
        plt.vlines(beats, 0, log_S.shape[0], colors='w', linestyles='-', linewidth=1.5)
        
    # draw a color bar
    plt.colorbar(format='%+02.0f dB')

    # Make the figure layout compact
    plt.tight_layout()

    # This make sure the figures are plotted in place and not after text and audio
    plt.show()
    plt.close()
    

def _analyze_beats(audio_filepath, bpm, label=None, tightness=400, offset=0, duration=None, vy=True, vh=True, vp=True, vb=True):
    print audio_filepath
    # load audio file
#     sr = 22050  # default
    sr = 44100  # slower but gives better results for Harry Potter Theme
    y, sr = librosa.load(audio_filepath, sr=sr, offset=offset, duration=duration)
    
    if label is not None:
        print label
    
    if vy:
        visualize(y, sr, 'original')
    
    # split into harmonic and percussive component
    y_harmonic, y_percussive = librosa.effects.hpss(y)
    
    if vh:
        visualize(y_harmonic, sr, 'harmonic component')
    if vp:
        visualize(y_percussive, sr, 'percussive component')
    
    # Now, let's run the beat tracker
    # We'll use the percussive component for this part
    # By default, the beat tracker will trim away any leading or trailing beats that don't appear strong enough.
    # To disable this behavior, call beat_track() with trim=False.

    tempo, beats = librosa.beat.beat_track(y=y_percussive, sr=sr, hop_length=64, trim=False, start_bpm=bpm, tightness=tightness)

    # Let's re-draw the spectrogram, but this time, overlay the detected beats
    if vb:
        visualize(y, sr, 'with beats', beats=beats)

    print 'Offset:                 %.4f s' % offset
        
    print 'Expected tempo:         %.2f BPM' % bpm
    print 'Estimated tempo:        %.2f BPM' % tempo
    print 'First 5 beat frames:   ', beats[:5]

    # Frame numbers are great and all, but when do those beats occur?
    print 'First 5 beat times:    ', librosa.frames_to_time(beats[:5], sr=sr, hop_length=64)
    
    return tempo, beats, librosa.frames_to_time(beats, sr=sr, hop_length=64)

def get_audio_filepath(meta):
    return os.path.join(data_root, 'audio', 'full.v{}'.format(STIMULI_VERSION), meta['audio_file'])

def analyze_beats(meta, tightness=400, save=default_save_beat_times, **kwargs):
    tempo, beat_frames, beat_times = _analyze_beats(
        audio_filepath=get_audio_filepath(meta),
        label=meta['label'], 
        bpm=meta['bpm'], 
        tightness=tightness, 
        offset=meta['length_of_cue'],
        **kwargs
    )
    
    if save:
        offset = meta['length_of_cue']
        save_beat_times(beat_times, stimulus_id=meta['id'], offset=offset, version=STIMULI_VERSION)

    return tempo, beat_frames, beat_times

In [ ]:
# NOTE: this is experimental
def analyze_onsets(meta):
    audio_filepath=os.path.join(data_root, 'audio', 'full.v{}'.format(STIMULI_VERSION), meta['audio_file'])
    sr = 44100  # slower but gives better results for Harry Potter Theme
    offset=meta['length_of_cue']
    duration=None
    print sr
    y, sr = librosa.load(audio_filepath, sr=sr, offset=offset, duration=duration)
    """
    # Get onset times from a signal
    onset_frames = librosa.onset.onset_detect(y=y, sr=sr, hop_length=64)
    onset_times = librosa.frames_to_time(onset_frames, sr, hop_length=64)

    # Or use a pre-computed onset envelope
    o_env = librosa.onset.onset_strength(y, sr=sr)
    onset_frames = librosa.onset.onset_detect(onset_envelope=o_env, sr=sr)
    onset_times = librosa.frames_to_time(onset_frames, sr, hop_length=64)
    """
    onset_frames = librosa.onset.onset_detect(y=y, sr=sr, hop_length=64)
    print onset_frames
    visualize(y, sr, 'with beats', beats=onset_frames)
    
    o_env = librosa.onset.onset_strength(y, sr=sr)
    plt.plot(o_env)
    onset_frames = librosa.onset.onset_detect(onset_envelope=o_env, sr=sr)
    print onset_frames
    visualize(y, sr, 'with beats', beats=onset_frames*7)

In [ ]:
meta = load_stimuli_metadata(data_root, version=STIMULI_VERSION)
# print meta

some examples


In [ ]:
# run this to analyze onsets for stimulus 22
analyze_onsets(meta[22])

In [ ]:
# run this to analyze onsets for stimulus 1, specify tightness
tempo, beat_frames, beat_times = analyze_beats(meta[1], tightness=800)
print beat_times
print beat_frames

In [ ]:
# analyze beginning of stimulus 22, 
# suppress visualization of original signal (vy) and harmonic (vh) and percussive (vp) component
_analyze_beats(get_audio_filepath(meta[22]), 
               bpm=166, tightness=250, offset=2.182, duration=4.0, vy=False, vh=False, vp=False);

In [ ]:
# test different tightness settings on stimulus 22
_analyze_beats(get_audio_filepath(meta[22]), 
               bpm=166, tightness=250, offset=0, duration=None, vy=False, vh=False, vp=False);
_analyze_beats(get_audio_filepath(meta[22]), 
               bpm=166, tightness=400, offset=0, duration=None, vy=False, vh=False, vp=False);
_analyze_beats(get_audio_filepath(meta[22]), 
               bpm=166, tightness=800, offset=0, duration=None, vy=False, vh=False, vp=False);

analyze stimuli


In [ ]:
_ = analyze_beats(meta[1], tightness=1000, vy=False, vh=False, vp=False)

In [ ]:
_ = analyze_beats(meta[2], tightness=800, vy=False, vh=False, vp=False)

In [ ]:
_ = analyze_beats(meta[3], tightness=800, vy=False, vh=False, vp=False)

In [ ]:
_ = analyze_beats(meta[4], tightness=800, vy=False, vh=False, vp=False)

In [ ]:
_ = analyze_beats(meta[11], tightness=800, vy=False, vh=False, vp=False)

In [ ]:
_ = analyze_beats(meta[12], tightness=800, vy=False, vh=False, vp=False)

In [ ]:
_ = analyze_beats(meta[13], tightness=800, vy=False, vh=False, vp=False)

In [ ]:
_ = analyze_beats(meta[14], tightness=800, vy=False, vh=False, vp=False)

In [ ]:
_ = analyze_beats(meta[21], tightness=800, vy=False, vh=False, vp=False)

In [ ]:
_ = analyze_beats(meta[22], tightness=300, vy=False, vh=False, vp=False)

In [ ]:
_ = analyze_beats(meta[23], tightness=800, vy=False, vh=False, vp=False)

In [ ]:
_ = analyze_beats(meta[24], tightness=800, vy=False, vh=False, vp=False)

analyze cue click tracks


In [ ]:
from deepthought.datasets.openmiir.constants import STIMULUS_IDS

for stimulus_id in STIMULUS_IDS:
    tempo, beat_frames, beat_times = _analyze_beats(
        audio_filepath=os.path.join(data_root, 'audio', 'cues.v{}'.format(STIMULI_VERSION), meta[stimulus_id]['cue_file']), 
        label=meta[stimulus_id]['label'], 
        bpm=meta[stimulus_id]['cue_bpm'], 
        tightness=10000, vy=False, vh=False, vp=False
    )
    
    if default_save_beat_times:
        save_beat_times(beat_times, stimulus_id=stimulus_id, cue=True, version=STIMULI_VERSION)